home *** CD-ROM | disk | FTP | other *** search
- #! /usr/bin/perl -w
-
- =pod
-
- =head1 NAME
-
- bogoupgrade -- upgrade a bogofilter database to current version.
-
- =cut
-
- # bogofilter-0.3 through bogofilter-0.6.3
- #
- # HEADER "# bogofilter email-count (format version A): %lu"
- #
-
- # bogofilter-0.7.x
- #
- # HEADER "# bogofilter email-count (format version B): %lu"
- #
-
- # bogofilter-0.8 to bogofilter-0.13.x
- #
- # BerkeleyDB with double wordlists
- # spamlist.db and goodlist.db
-
- # bogofilter-0.14 and later
- #
- # BerkeleyDB with single wordlist
- # wordlist.db
-
- require 5.005_03;
- use strict;
- use filetest 'access';
- use Pod::Usage;
-
- sub convert_double_to_single();
- sub convert_format_A();
- sub convert_format_B();
- sub db_verify($);
-
- my $VERSION = '0.5.0';
-
- my ($in, $out, $dir, $yday, $msg_count_token);
-
- my $bogoutil = 'bogoutil';
-
- for (my $i = 0; $i < @ARGV; $i++){
- my $arg = $ARGV[$i];
-
- if ($arg eq '-d'){
- $dir = $ARGV[++$i];
- } elsif ($arg eq '-i'){
- $in = $ARGV[++$i];
- } elsif ($arg eq '-o'){
- $out = $ARGV[++$i];
- } elsif ($arg eq '-b'){
- $bogoutil = $ARGV[++$i];
- } elsif ($arg eq '-y'){
- $yday = "-y $ARGV[++$i]";
- } elsif ($arg eq '-h' or $arg eq '--help'){
- pod2usage(-verbose => 1, -exitstatus => 0);
- } else {
- pod2usage(-verbose => 0, -exitstatus => 1);
- }
- }
-
- my $db_ext = $ENV{DB_EXT} || 'db';
-
- if ($dir) {
- if ($in || $out) {
- pod2usage("Cannot use -d at the same time as -i or -o!");
- }
- convert_double_to_single();
- } else {
- pod2usage("Missing input filename.\nAborting") unless $in;
- pod2usage("Missing output filename.\nAborting") unless $out;
-
- my $msg_count_token = '.MSG_COUNT';
-
- open(F, "< $in") or die "Cannot open input file [$in]. $!.\nAborting";
- my $sig = <F>;
- chomp($sig);
- if ($sig =~ m/^\# bogofilter wordlist \(format version A\):\s(\d+)$/){
- convert_format_A();
- }
- elsif ($sig =~ m/^\# bogofilter email-count \(format version B\):\s(\d+)/){
- convert_format_B();
- }
- else {
- $sig =~ y/[\040-\177]/_/cs;
- warn "Cannot recognize signature [$sig].\n";
- exit(2);
- }
- }
-
- sub db_verify($) {
- my ($f) = shift;
- if (!-r $f) {
- die "Database $f is not readable: $!\nAborting";
- }
- my $ret = system ($bogoutil, '--db-verify', $f);
- die "Database $f is corrupt.\nAborting" if $ret;
- }
-
- sub convert_format_A() {
- my $msg_count = $1;
- my $cmd = "$bogoutil $yday -l $out";
- open(OUT, "| $cmd") or die "Cannot run command \"$cmd\": $!\nAborting";
- while(<F>){
- print OUT $_;
- }
- print OUT "$msg_count_token $msg_count\n";
- close(OUT) or die "Error executing command \"$cmd\": $!\nAborting";
- close(F);
- }
-
- sub convert_format_B() {
- my $msg_count = $1;
- my $in_db = $in;
-
- $in_db =~ s/count$/$db_ext/;
-
- unless (-f $in_db){
- warn("Cannot find database file [$in_db]\n corresponding to input file [$in]\n");
- exit;
- }
-
- my $cmd = "$bogoutil $yday -l $out";
- open(OUT, "| $cmd") or die "Cannot run command \"$cmd\": $!\nAborting";
-
- close(F);
- db_verify($in_db);
- $cmd = "$bogoutil $yday -d $in_db";
- open(F, "$cmd |") or die "Cannot run command \"$cmd\": $!\nAborting";
-
- while(<F>){
- if (m/^\.count\s+(\d+)$/){
- warn("Found a message count of [$1] in db.\nThrowing away text file count of [$msg_count]\n");
- $msg_count = $1;
- next;
- }
- elsif (/^$msg_count_token\s(\d+)$/){
- warn("This database appears to have been upgraded already.\nBut there's no harm in doing it again.\n");
- $msg_count = $1;
- next;
- }
- print OUT $_;
- }
- print OUT "$msg_count_token $msg_count\n";
-
- close(F);
- close(OUT);
- }
-
- # args: filename format
- sub cvt2to1_get($$) {
- my ($filename, $format) = @_;
- my @tmp;
- my $cmd;
-
- die "need array context in cvs2to1_get.\nAborting" unless wantarray;
- db_verify($filename);
- $cmd = "$bogoutil -d $filename";
- open(I, "$cmd |") or die "cannot run \"$cmd\": $!\nAborting";
- while (<I>) {
- chomp;
- my @a = split;
- push @tmp, sprintf($format, $a[0], $a[1], $a[2]);
- }
- close I or die "error running \"$cmd\": $!\nAborting";
- return @tmp;
- }
-
- sub convert_double_to_single() {
- my @tmp;
-
- my $word = "$dir/wordlist.$db_ext";
- my $spam = "$dir/spamlist.$db_ext";
- my $good = "$dir/goodlist.$db_ext";
-
- if (-e $word) { die "$word already exists.\nAborting"; }
-
- push @tmp, cvt2to1_get($spam, "%s %d 0 %d");
- push @tmp, cvt2to1_get($good, "%s 0 %d %d");
- my $cmd = "$bogoutil -l $word";
- open(O, "| $cmd") or die "cannot run \"$cmd\": $!\nAborting";
- print O join("\n", sort @tmp), "\n";
- close O or die "error executing \"$cmd\": $!\nAborting";
- }
-
- exit(0);
-
- __END__
- =pod
-
- =head1 SYNOPSIS
-
- bogoupgrade [options] -d <bogofilter directory>
- bogoupgrade [options] -i <input text file> -o <output db file>
- bogoupgrade {-h|--help}
-
- Options:
- -b <path to bogoutil>
-
- =head1 DESCRIPTION
-
- B<bogoupgrade> updates bogofilter databases from older formats to the current
- format.
-
- =head1 OPTIONS
-
- =over 8
-
- =item B<-d <directory>>
-
- Name of directory containing database files. Old files will be read and new
- files will be written.
-
- =item B<-i <input file>>
-
- Text file containing message count, and possibly wordlist data records. If the
- file only contains a message count but no word list records, there must be a
- database file, in the same directory as the text file, which contains the word
- list data.
-
- =item B<-o <output file>>
-
- Output database file. Use the appropriate file extension for your version of
- bogofilter, i.e. '.db' for Berkeley DB and SQLite3, or '.qdbm' for QDBM.
-
- =item B<-b <path to bogoutil program>>
-
- Defaults to 'bogoutil', in the hopes that your shell will find it.
-
- =item B<-h> | B<--help>
-
- Print the help - you are currently reading it.
-
- =back
-
- =head1 AUTHORS
-
- Gyepi Sam <gyepi@praxis-sw.com>
- David Relson <relson@osagesoftware.com>
- Matthias Andree <matthias.andree@gmx.de>
-
- =cut
-
- # vim: set filetype=perl ai:
-